# ------------------------------------------------------------------
# Distribution of Legislative Power-Sharing Measure Plots Appendix E
# ------------------------------------------------------------------

# Packages
library(haven)
library(dplyr)

# -------------------------------
# Helpers
# -------------------------------

# Normalize to [0,1]
normalize01 <- function(x) {
  rng <- range(x, na.rm = TRUE)
  (x - rng[1]) / (rng[2] - rng[1])
}

# Plot histogram (grayscale) + median line; returns median
plot_distribution <- function(x, title = "Estimate of Legislative Power Sharing",
                              breaks = 20) {
  x <- x[is.finite(x)]
  med <- median(x, na.rm = TRUE)
  
  hist(x,
       breaks = breaks,
       col = "gray",
       main = "",
       xlab = title,
       border = "black")
  abline(v = med, col = "black", lwd = 2, lty = 2)  # grayscale median
  
  message(sprintf("Median (normalized): %.3f", med))
  invisible(med)
}

# -------------------------------
# 1) Boix & Svolik pipeline
# -------------------------------

# Inputs
bs_data_path   <- "leader_tvc_2.dta"
latent_path    <- "estimates_independent.csv"   # swap to static version as needed

# Load
bs_data <- read_dta(bs_data_path, encoding = "latin1") %>%
  mutate(COWcode = ccode)

latent  <- read.csv(latent_path)

# Restrict to overlap and make time0
df_restricted <- bs_data %>%
  semi_join(latent, by = c("COWcode", "year")) %>%
  arrange(COWcode, year, leadid) %>%
  group_by(leadid) %>%
  mutate(time0 = dplyr::lag(time, default = 0)) %>%
  ungroup()

# Merge and create lags/growth interaction
merged_bs <- merge(latent, bs_data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    leg_growth_new   = dyn.estimates * growth_1,
    dyn.estimates_lag = dplyr::lag(dyn.estimates, 1),
    dyn.up_lag        = dplyr::lag(dyn.up, 1),
    dyn.lo_lag        = dplyr::lag(dyn.lo, 1)
  ) %>%
  ungroup()

# Filter to legislature==1 and normalize
df_new_leg_bs <- merged_bs %>%
  select(legislature, dyn.estimates) %>%
  filter(legislature == 1) %>%
  mutate(normalized = normalize01(dyn.estimates))

# Plot + median
cat("\n--- Boix & Svolik sample ---\n")
median_bs <- plot_distribution(df_new_leg_bs$normalized)

# -------------------------------
# 2) Kim & Sudduth pipeline
# -------------------------------

# Inputs
ks_data_path <- "kim_sudduth.dta"
# Reuse latent_path set above (swap to static/other file if needed)

# Load
ks_data <- read_dta(ks_data_path)

# Harmonize id for merge
ks_data <- ks_data %>%
  mutate(COWcode = if ("cowcode" %in% names(.)) cowcode else COWcode)

# Merge latent with original
data_new <- inner_join(latent, ks_data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = dplyr::lag(dyn.estimates, 1),
    dyn.up_lag        = dplyr::lag(dyn.up, 1),
    dyn.lo_lag        = dplyr::lag(dyn.lo, 1)
  ) %>%
  ungroup()

# Keep rows with lag present (to mirror your prior filtering)
data_new_exact <- data_new %>% filter(!is.na(dyn.estimates_lag))

# Filter to gwf_legislature==1 and normalize
df_leg_ks <- data_new_exact %>%
  filter(gwf_legislature == 1) %>%
  mutate(normalized = normalize01(dyn.estimates))

# Plot + median
cat("\n--- Kim & Sudduth sample ---\n")
median_ks <- plot_distribution(df_leg_ks$normalized)

# -------------------------------
# Output medians (console)
# -------------------------------
cat(sprintf("\nMedians (normalized):\n  Boix & Svolik:   %.3f\n  Kim & Sudduth:   %.3f\n",
            median_bs, median_ks))
